pip install -r requirements.txt
import sqlite3
import pycountry
import datetime
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.read_csv("Data.csv")
df["Cases per 100,000 people"] = (df["Cases"] / (df["Population"] / 100000)).apply(lambda x: round(x,2))
df["Date"] = pd.to_datetime(df["Date"])
df["Deaths per 1000 Cases"] = (df["Deaths"] / df["Cases"] * 1000).apply(lambda x: round(x,2))
df.rename({"Entity": "Country"}, axis=1, inplace=True)
country_rename_dict = {"Bolivia": "Bolivia, Plurinational State of",
"Cape Verde": "Cabo Verde",
"Iran": "Iran, Islamic Republic of",
"Russia": "Russian Federation",
"South Korea": "Korea, Republic of",
"Vietnam": "Viet Nam"}
df["Country"].replace(country_rename_dict, inplace=True)
countries = {}
for country in pycountry.countries:
countries[country.name] = country.alpha_3
df["Country Code"] = df["Country"].apply(lambda x: countries.get(x))
df
df.isna().sum()
df.fillna(0, inplace=True)
country_dim_table_df = df[["Country",
"Country Code",
"Continent",
"Latitude",
"Longitude",
"Average temperature per year",
"Hospital beds per 1000 people",
"Medical doctors per 1000 people",
"GDP/Capita",
"Population",
"Median age",
"Population aged 65 and over (%)"]].drop_duplicates()
country_dim_table_df
case_fact_table_df = df[["Country",
"Date",
"Daily tests",
"Cases",
"Deaths",
"Cases per 100,000 people",
"Deaths per 1000 Cases"]]
case_fact_table_df
conn = sqlite3.connect("sqlite3_db")
c = conn.cursor()
c.execute("CREATE TABLE IF NOT EXISTS country_dim_table ('')")
c.execute("CREATE TABLE IF NOT EXISTS case_fact_table ('')")
conn.commit()
country_dim_table_df.to_sql("country_dim_table", conn, if_exists="replace", index= False)
case_fact_table_df.to_sql("case_fact_table", conn, if_exists="replace", index= False)
df1_cols = ["Date", "Country", "Country Code", "Cases per 100,000 people"]
c.execute("""
SELECT f.[Date],
f.[Country],
d.[Country Code],
f.[Cases per 100,000 people]
FROM case_fact_table f, country_dim_table d
WHERE f.Country = d.Country
""")
df1 = pd.DataFrame(c.fetchall(), columns=df1_cols)
df_cty_cse = df1.groupby("Country").last().reset_index()
df_cty_cse
fig = px.choropleth(df_cty_cse,
title=f"Number of COVID-19 cases per 100,000 people by Country as of {pd.to_datetime(max(df_cty_cse['Date'])).strftime('%d-%b-%Y')}",
locations="Country Code",
hover_name="Country",
color="Cases per 100,000 people",
color_continuous_scale="Reds")
fig.show()
df2_cols = ["Date",
"Deaths per 1000 Cases",
"Medical doctors per 1000 people",
"Country",
"Continent",
"GDP/Capita"]
c.execute("""
SELECT f.[Date],
f.[Deaths per 1000 Cases],
d.[Medical doctors per 1000 people],
d.[Country],
d.[Continent],
d.[GDP/Capita]
FROM case_fact_table f, country_dim_table d
WHERE f.Country = d.Country
""")
df2 = pd.DataFrame(c.fetchall(), columns=df2_cols)
df_cty_dth_cse = df2.groupby("Country")\
.last()\
.sort_values(by="Deaths per 1000 Cases", ascending=True)\
.reset_index()
df_cty_dth_cse
fig = px.bar(df_cty_dth_cse,
x="Deaths per 1000 Cases",
y="Country",
color="GDP/Capita",
orientation="h",
hover_data=["Deaths per 1000 Cases", "Medical doctors per 1000 people"],
height=2000,
title=f"Deaths per 1000 COVID-19 Cases by Country as of {pd.to_datetime(max(df_cty_dth_cse['Date'])).strftime('%d-%b-%Y')} (Bars coloured by GDP per Capita)",
color_continuous_scale="Reds")
fig.show()
df3_cols = ["Date", "Cases", "Country", "Continent"]
c.execute("""
SELECT f.[Date],
f.[Cases],
d.[Country],
d.[Continent]
FROM case_fact_table f, country_dim_table d
WHERE f.Country = d.Country
""")
df_cty_cse_time = pd.DataFrame(c.fetchall(), columns=df3_cols)
df_cty_cse_time
fig = px.line(df_cty_cse_time,
x="Date",
y="Cases",
title="COVID-19 Cases over time by Country (Select continent from dropdown)",
color="Country",
custom_data=["Continent"])
fig.update_layout(updatemenus = [{"buttons": [{"label":"All continents",
"method": "restyle",
"args": [{"visible": True}]}] +
[{"label": t,
"method": "restyle",
"args": [{"visible": [np.unique(t2.customdata)[0] == t for t2 in fig.data]}],
}
for t in df["Continent"].unique()
]}])
fig.show()